﻿/* 
%include "/projects/programs/flin/201809/0_data/data_0_cityind_sum_core.sas" /source2;

Macro that aggregates data to the city-industry level.

dt_in = name of input data
dt_out = name of output data
c_city = variable for city/geo
l_ind_perc= list of percentiles to create statistics for (e.g. top 10 percentile ind-firms defined based on employment)
l_cind_rank= list of ranks to create statistics for (e.g. top 1 firm in the city-industry)
*/

%macro cind_sum(dt_in= , dt_out= , c_city= , l_ind_perc= , l_cind_rank= );

%put Top Industry Firms;
%put Input &dt_in.;
%put Output &dt_out.;
%put City &c_city.;
%put Percentile &l_ind_perc.;

data dt_proc;
  set &dt_in.(keep=year &c_city. ch_ind sector firmnum lbdid worker est salary indf_rank indf_perc runif);
run;

/*
--------------------------------------------------------------------------------
Aggregate to year-industry-city-firm level 
*/

proc sort data=dt_proc;
  by year sector ch_ind &c_city. firmnum indf_rank indf_perc;
run;

proc means data=dt_proc noprint;
  by year sector ch_ind &c_city. firmnum indf_rank indf_perc;
  output out=dt_indf(drop=_type_ _freq_) sum(worker est salary)=emp_cind est_cind pay_cind mean(runif)=runif;
run;
/* Note that *_cind will become city-industry level values in the end; currently at city-ind-firm level */

data dt_indf;
  set dt_indf;
  n_cind=1;
  emp2_cindf = emp_cind**2; /* For HHI */
run;

/*
--------------------------------------------------------------------------------
Generate firm rank in city-industry 
*/

proc sort data=dt_indf; by year &c_city. ch_ind indf_rank;

data dt_indf;
  set dt_indf;
  by year &c_city. ch_ind;
  if first.ch_ind then cindf_rank=0;
  cindf_rank+1;
run;

%Let l_cind_var_0=emp_cind est_cind pay_cind n_cind emp2_cindf;

/* 
--------------------------------------------------------------------------------
Create variables related to top industry firms
e.g. employment of top 10% industry-firms in the city (percentile based on employment and defined at industry level)
*/

%Let l_ind_var= ;

%Let i_list = 1;
%do %while (%scan(%bquote(&l_ind_perc.), &i_list) ~= );
  %Let i_perc=%scan(%bquote(&l_ind_perc.), &i_list.);
  %put Top &i_perc.% Industry Firms;
  
  data dt_indf;
    set dt_indf;
    emp_ind_&i_perc. = 0;
    est_ind_&i_perc. = 0;
    pay_ind_&i_perc. = 0;
    n_ind_&i_perc. = 0;
  run;
  
  data dt_indf;
    set dt_indf;
    if indf_perc <= %sysevalf(&i_perc. / 100) then 
      do;
	emp_ind_&i_perc. = emp_cind;
	est_ind_&i_perc. = est_cind;
	pay_ind_&i_perc. = pay_cind;
	n_ind_&i_perc. = 1;
      end;
  run;
  
  %Let l_ind_var = &l_ind_var. emp_ind_&i_perc. est_ind_&i_perc. pay_ind_&i_perc. n_ind_&i_perc.;
  %Let i_list = %eval(&i_list + 1);
%end;

%put List of Variables for Top Industry Firms;
%put &l_ind_var.;

/* 
--------------------------------------------------------------------------------
Create variables related to top city-industry firms 
e.g. employment of top 1 city-industry firm in the city (rank based on employment and defined at city-industry level)
*/

%Let l_cind_var= ;

%Let i_list = 1;
%do %while (%scan(%bquote(&l_cind_rank.), &i_list) ~= );
  %Let i_rank=%scan(%bquote(&l_cind_rank.), &i_list.);
  %put Top &i_rank. City-Industry Firms;
  
  data dt_indf;
    set dt_indf;
    emp_cind_t&i_rank. = 0;
    est_cind_t&i_rank. = 0;
    pay_cind_t&i_rank. = 0;
    n_cind_t&i_rank. = 0;
  run;
  
  data dt_indf;
    set dt_indf;
    if cindf_rank <= &i_rank. then 
      do;
	emp_cind_t&i_rank. = emp_cind;
	est_cind_t&i_rank. = est_cind;
	pay_cind_t&i_rank. = pay_cind;
	n_cind_t&i_rank. = 1;
      end;
  run;
  
  %Let l_cind_var = &l_cind_var. emp_cind_t&i_rank. est_cind_t&i_rank. pay_cind_t&i_rank. n_cind_t&i_rank.;
  %Let i_list = %eval(&i_list + 1);
%end;

%put List of Variables for Top City-Industry Firms;
%put &l_cind_var.;

/*
--------------------------------------------------------------------------------
Aggregate to year-industry-city level 
*/

proc sort data=dt_indf; by year sector ch_ind &c_city.;

proc means data=dt_indf noprint;
  by year sector ch_ind &c_city.;
  output out=&dt_out.(drop=_type_ _freq_) sum(&l_cind_var_0. &l_ind_var. &l_cind_var.)=&l_cind_var_0. &l_ind_var. &l_cind_var.;
run;

/* Calculate HHI */
data &dt_out.;
  set &dt_out.;
  hhi_cind = emp2_cindf / (emp_cind**2);
run;

%mend;


/* End of SAS file */
